#---------------------------------------------------------------------------------------------------------# 
#---------------------------------------------------------------------------------------------------------# 
# ------------------------------------        0. Max Joosten         ------------------------------------ #
#-------------------------------------   Last updated: 08.09.2023    -------------------------------------# 
#---------------------------------------------------------------------------------------------------------# 

# 0. Load relevant packages
library(tidyverse)   # Collection of useful packages (mostly dplyr)
library(haven)       # For reading foreign data (read_dta)
library(stargazer)   # For printing regression tables
library(sandwich)    # For adjusting standard errors
library(patchwork)   # For combining the plots in one figure
library(ggeffects)   # For visualizing the predicted values

# 0. Set working directory
setwd("your path name")
df <- read_stata("df_complete.dta")

# ------------------------------------          Table 1         ------------------------------------ #
# 1. Influence income 
# Stata code for same results: reg pp l_ipref_1 i.area i.year i.party spendchange [aweight=iwt1], vce(rob)
# Stata code for same results: reg pp l_ipref_2 i.area i.year i.party spendchange [aweight=iwt2], vce(rob)
# Stata code for same results: reg pp l_ipref_3 i.area i.year i.party spendchange [aweight=iwt3], vce(rob)
# Stata code for same results: reg pp l_ipref_1 l_ipref_2 l_ipref_3 i.area i.year i.party spendchange [aweight=iwt123], vce(rob)
m1 <- lm(pp ~ l_ipref_1 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_1, data = df)
m2 <- lm(pp ~ l_ipref_2 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_2, data = df)
m3 <- lm(pp ~ l_ipref_3 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_3, data = df)
m4 <- lm(pp ~ l_ipref_1 + l_ipref_2 + l_ipref_3 + as.factor(area) + as.factor(party) + as.factor(year) + spendchange, weights = iwt123, data = df)

# Adjust standard errors
robust_se_m1 <- sqrt(diag(vcovHC(m1, type = "HC1")))
robust_se_m2 <- sqrt(diag(vcovHC(m2, type = "HC1")))
robust_se_m3 <- sqrt(diag(vcovHC(m3, type = "HC1")))
robust_se_m4 <- sqrt(diag(vcovHC(m4, type = "HC1")))

# Output
stargazer(m1, 
          m2,
          m3, 
          m4, 
          type = "text", 
          omit = c("party", "year", "area"),
          keep.stat=c("n", "adj.rsq"),
          #out = "../05_figures/table1.html",
          dep.var.labels = c("Party positions"),
          se = list(
            robust_se_m1, robust_se_m2, robust_se_m3, robust_se_m4),
          header=FALSE, 
          title = "Linear regression models of parties' spending positions",
          no.space = TRUE, 
          column.sep.width = "3pt", 
          font.size = "small", 
          notes = "All models include fixed effects for years, parties and policy areas.",
          covariate.labels=c("Lower income voter preferences", "Middle income voter preferences", "Higher income voter preferences", "Change in spending since previous election"),
          add.lines=list(c("Fixed effects", "Yes", "Yes", "Yes", "Yes")))

# ------------------------------------          Figure 2         ------------------------------------ #
# Estimate 10 steps from min to max value
seq(-0.7408013,0.740029079, length.out = 10)

# Estimate (2) Predicted values plot (individual models)
mydf1 <- ggpredict(m1, terms = c("l_ipref_1[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))
mydf2 <- ggpredict(m2, terms = c("l_ipref_2[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))
mydf3 <- ggpredict(m3, terms = c("l_ipref_3[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))

# Estimate (4) Predicted values plot (combined models)
mydf4 <- ggpredict(m4, terms = c("l_ipref_1[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))
mydf5 <- ggpredict(m4, terms = c("l_ipref_2[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))
mydf6 <- ggpredict(m4, terms = c("l_ipref_3[-0.74080130, -0.57626459, -0.41172788, -0.24719117, -0.08265446,  0.08188224,  0.24641895,  0.41095566,  0.57549237,  0.74002908]"))

# Plot figures (saved in an object to later combine in one output for the paper. Remove hashtags to include confidence intervals)
f1 <- ggplot() + 
  geom_line(data = mydf1, aes(x, predicted, linetype = "Low"), linewidth = 1.25) +
  geom_line(data = mydf2, aes(x, predicted, linetype = "Middle"), linewidth = 1.25) +
  geom_line(data = mydf3, aes(x, predicted, linetype = "High"), linewidth = 1.25) +
  theme_classic() + ylim(-0.5, 0.6) + ggtitle("Separate models")  +
  scale_linetype_discrete(breaks = c("Low", "Middle", "High"),
                          labels = c("Low", "Middle", "High"),
                          guide = "legend") +
  labs(x = "Voters' support", y = "Predicted parties' support", linetype="Income") + 
  theme(axis.text=element_text(size=18),
        axis.title=element_text(size=20),
        legend.text=element_text(size=16),
        legend.title = element_text(size = 20),
        plot.title = element_text(size = 20, face = "bold"),
        legend.key.width = unit(2,"cm"))

f2 <- ggplot() + 
  geom_line(data = mydf4, aes(x, predicted, linetype = "Low"), linewidth = 1.25) +
  geom_line(data = mydf5, aes(x, predicted, linetype = "Middle"), linewidth = 1.25) +
  geom_line(data = mydf6, aes(x, predicted, linetype = "High"), linewidth = 1.25) +
  theme_classic() + ylim(-0.5, 0.6) + ggtitle("Combined model")  +
  scale_linetype_discrete(breaks = c("Low", "Middle", "High"),
                          labels = c("Low", "Middle", "High"),
                          guide = "legend") +
  labs(x = "Voters' support", y = "", linetype="Income") + 
  theme(axis.text=element_text(size=18),
        axis.title=element_text(size=20),
        legend.text=element_text(size=16),
        legend.title = element_text(size = 20),
        plot.title = element_text(size = 20, face = "bold"),
        legend.key.width = unit(2,"cm"))

# Plot Figure 2
f1 + f2 + plot_layout(guides = "collect")
ggsave(filename = "../05_figures/figure2.png", width = 15, height = 5)

# Cleaning global environment
rm(list=setdiff(ls(), c("df")))

# ------------------------------------          Table 2         ------------------------------------ #
# 2. Adaptation income
# Stata version: reg ipref_1 pp l_ipref_1  i.area i.year i.party spendchange [aweight=iwt_1], vce(rob)
# Stata version: reg ipref_2 pp l_ipref_2  i.area i.year i.party spendchange [aweight=iwt_2], vce(rob)
# Stata version: reg ipref_3 pp l_ipref_3  i.area i.year i.party spendchange [aweight=iwt_3], vce(rob)
m5 <- lm(ipref_1 ~ pp + l_ipref_1 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_1, data = df)
m6 <- lm(ipref_2 ~ pp + l_ipref_2 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_2, data = df)
m7 <- lm(ipref_3 ~ pp + l_ipref_3 + as.factor(area) + as.factor(year) + as.factor(party) + spendchange, weights = iwt_3, data = df)

# Adjust standard errors
robust_se_m5 <- sqrt(diag(vcovHC(m5, type = "HC1")))
robust_se_m6 <- sqrt(diag(vcovHC(m6, type = "HC1")))
robust_se_m7 <- sqrt(diag(vcovHC(m7, type = "HC1")))

# Output
stargazer(m5, m6, m7,
          type = "html", 
          omit = c("party", "year", "area"),
          keep.stat=c("n", "adj.rsq"),  
          out = "../05_figures/table2.html",
          dep.var.labels = c("Lower income", "Middle income", "Higher income"),
          se = list(robust_se_m5, robust_se_m6, robust_se_m7),
          header=FALSE,
          title = "Linear regression models of the public's spending preferences",
          no.space = TRUE,
          column.sep.width = "3pt",
          font.size = "small", 
          notes = "All models include fixed effects for years, parties and policy areas.",
          covariate.labels=c("Party positions", "Lagged lower income", "Lagged middle income", "Lagged high income", "Change in spending since previous election"),
          add.lines=list(c("Fixed effects", "Yes", "Yes", "Yes")))

# ------------------------------------          Figure 3         ------------------------------------ #
# Estimate (2) Predicted values plot (individual models)
mydf5 <- ggpredict(m5, terms = c("pp"))
mydf6 <- ggpredict(m6, terms = c("pp"))
mydf7 <- ggpredict(m7, terms = c("pp"))

# Plot Figure 3 (Remove hashtags to include confidence intervals)
ggplot() + 
  geom_line(data = mydf7, aes(x, predicted, linetype = "Low"), size = 1.25) +
  geom_line(data = mydf8, aes(x, predicted, linetype = "Middle"), size = 1.25) +
  geom_line(data = mydf9, aes(x, predicted, linetype = "High"), size = 1.25) +
  theme_classic() +
  labs(title = "Separate models",x = "Parties' support", y = "Predicted voters' support", linetype="Income")  +
  scale_linetype_discrete(breaks = c("Low", "Middle", "High"),
                          labels = c("Low", "Middle", "High"),
                          guide = "legend") + 
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=20),
        legend.text=element_text(size=16),
        legend.title = element_text(size = 20),
        plot.title = element_text(size = 20, face = "bold"),
        legend.key.width = unit(2,"cm")) 

ggsave(filename = "../05_figures/figure3.png", width = 7.5, height = 5)

# Cleaning global environment
rm(list=setdiff(ls(), c("df")))

# ------------------------------------          Table 3         ------------------------------------ #
# 3. Influence of income at t-2 and t
# Stata code for same results: reg pp l_ipref_1 l_ipref_2 l_ipref_3 i.area i.year i.party spendchange [aweight=iwt123], vce(rob)
m8 <- lm(pp ~ l2_ipref_1 + l2_ipref_2 + l2_ipref_3 + as.factor(area) + as.factor(party) + as.factor(year) + spendchange, weights = iwt123, data = df)
m9 <- lm(pp ~ ipref_1 + ipref_2 + ipref_3 + as.factor(area) + as.factor(party) + as.factor(year) + spendchange, weights = iwt123, data = df)

# Adjust standard errors
robust_se_m8 <- sqrt(diag(vcovHC(m8, type = "HC1")))
robust_se_m9 <- sqrt(diag(vcovHC(m9, type = "HC1")))

# Output
stargazer(m8, m9,  
          type = "html", 
          omit = c("party", "year", "area"),
          keep.stat=c("n", "adj.rsq"),
          out = "../05_figures/table3.html",
          dep.var.labels = c("Party positions"),
          se = list(robust_se_m8, robust_se_m9),
          header=FALSE, 
          title = "Linear regression models of parties' spending positions",
          no.space = TRUE, 
          column.sep.width = "3pt", 
          font.size = "small", 
          notes = "All models include fixed effects for years, parties and policy areas.",
          covariate.labels=c("Lower income voter preferences (t-2)", "Middle income voter preferences (t-2)", "Higher income voter preferences (t-2)", 
                             "Lower income voter preferences (t+1)", "Middle income voter preferences (t+1)", "Higher income voter preferences (t+1)", "Change in spending since previous election"),
          add.lines=list(c("Fixed effects", "Yes", "Yes", "Yes", "Yes")))